In this notebook, we estimate LGM monthly mean precipitation and minimum and maximum precipitation in the western Mediterranean by statistically a downscaling General Circulation Model (GCM) paleoclimate simulation. We first calibrate a Genralized Additive Model (GAM) using observed WorldClim climatologies, DEM-derived topographic variables, and simulations of present day climates using NCAR’s CESM/CCSM4 model. Then we apply this model to CCSM4 simulations of the LGM PMIP3 experiments.
##Preparation ###Setup
First load the necessary R packages.
library(raster) # functions for managing and processing raster data
library(mgcv) # functions to fit and analyze GAMs
library(dismo) # functions to sample points weighted by latitude
library(magrittr) # piping functions for code readability
library(rasterVis) # functions for visualizing raster maps
library(ggplot2) # functions for plotting
library(reshape2)
Import the observed present-day climatologies which we’ll use to calibrate the model. We use WorldClim data at 5min resolution here, which can be easily downloaded using the getData() function in the raster package.
get.wc <- function(var){
raster::getData('worldclim', var=var, res = 5, download = T) %>%
crop(extent(-20, 40, 30, 58)) %>%
set_names(month.name)
}
obs.prc <- get.wc('prec')
obs.tmx <- get.wc('tmax') %>% divide_by(10) # raw data in degrees Celsius * 10
obs.tmn <- get.wc('tmin') %>% divide_by(10)
import.gcm <- function(dir, var, sim = 'lgm', level = 1){
gcm.in <- brick(dir, var = var, level = level)
if(sim == 'control'){gcm.in <- gcm.in[[1200:1799]]}
if(sim == 'lgm'){gcm.in <- gcm.in[[4213:4812]]}
gcm.in <- stackApply(gcm.in, indices = 1:12, fun = mean)
extent(gcm.in) <- extent(0, 360, -90, 90)
gcm.in %>%
rotate %>%
projectRaster(obs.prc) %>%
mask(obs.prc)
}
control.prc <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.PRECT.185001-200512.nc',
var = "PRECT",
sim = 'control') %>% multiply_by(2629743830)
## Loading required namespace: ncdf4
control.tmx <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.TREFMXAV.185001-200512.nc',
var = "TREFMXAV",
sim = 'control') %>% subtract(273.15)
control.tmn <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.TREFMNAV.185001-200512.nc',
var = "TREFMNAV",
sim = 'control') %>% subtract(273.15)
control.psl <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.PSL.185001-200512.nc',
var = "PSL",
sim = 'control') %>% divide_by(1000)
control.q <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.QREFHT.185001-200512.nc',
var = "QREFHT",
sim = 'control')
control.u <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.U.185001-200512.nc',
var = "U",
sim = 'control',
level = 26)
control.v <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.V.185001-200512.nc',
var = "V",
sim = 'control',
level = 26)
control.z <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.Z3.185001-200512.nc',
var = "Z3",
sim = 'control',
level = 23) # ~850 geopotential
Generate some topographic predictors from a DEM.
Start with importing a DEM, then calculate aspect and slope maps using the terrain function.
elev <- raster('alt_5m_bil/alt.bil') %>% projectRaster(obs.prc)
aspect <- elev %>% terrain(opt='aspect', unit = 'degrees')
slope <- elev %>% terrain(opt = 'slope', unit = 'degrees')
Calculate the euclidean distance to the ocean, accounting for latitude effects.
dco <- raster('alt_5m_bil/alt.bil') %>% # import WorldClim 5m dem
crop(extent(-20, 45, 30, 65)) %>% # start with a wider region to get accurate distances
reclassify(c(-Inf, Inf, NA, NA, NA, 1)) %>% # reverse NA and non-NA cells
distance(doEdge = T) %>% # calculate the distances
projectRaster(elev) %>%
mask(elev) %>% # mask out ocean cells
divide_by(1000) # convert to km
streamplot(stack(control.u[[1]], control.v[[1]]), isField = 'dXY')
streamplot(stack(control.u[[7]], control.v[[7]]), isField = 'dXY')
dir <- (270 - overlay(control.v, control.u, fun = atan2) * (180 / pi)) %% 360
vel <- sqrt(control.u ^ 2 + control.v ^ 2)
delta <- abs(dir - aspect) #distance between wind angle and slope orientation
values(delta) <- ifelse(values(delta) > 180, 360 - values(delta), values(delta))
vg <- sin(slope * 2 * pi / 180) * cos(delta * pi / 180) * vel * .5
We can expect the model errors to vary by month and by location (i.e. spatially and temporally autocorrelated residuals). Let’s include these variables in the model fitting.
month <- obs.prc %>% setValues(c(as.factor(rep(1:12, each = 241920)))) # each should = ncell(vg)
lat <- elev %>%
coordinates %>%
extract( ,2) %>%
setValues(elev, .)
lon <- elev %>%
coordinates %>%
extract( ,1) %>%
setValues(elev, .)
Put everything together.
cal.vars <- sapply(1:12, function(x){
brick(obs.tmx[[x]], obs.tmn[[x]], obs.prc[[x]], control.tmx[[x]],
control.tmn[[x]], control.q[[x]], control.prc[[x]], control.psl[[x]],
control.z[[x]], elev, dco, vg[[x]], month[[x]], lat, lon) %>%
setNames(c('obs.tmx', 'obs.tmn', 'obs.prc','TREFMXAV', 'TREFMNAV', 'Q', 'PRC',
'PSL', 'Z3', 'elev','dco', 'vg', 'month', 'lat','lon'))
})
rm(control.tmx, control.tmn, control.q,control.prc,control.psl,control.z,control.v,control.u,vg,dir,vel,delta)
Sample the variables at random points, weighting for area distortions due to latitude
cal.data <- lapply(cal.vars, function(x) (raster::extract(x, randomPoints(elev, 20000)) %>% data.frame)) %>%
do.call(rbind, .)
let’s start with temperature
fit.tmn <- gam(obs.tmn ~ s(TREFMNAV, bs = 'cr') +
s(Z3, bs = 'cr') +
s(elev, bs = 'cr') +
s(dco, bs = 'cr') +
s(month, bs = 're'),
method = 'REML', data = cal.data)
fit.tmn
##
## Family: gaussian
## Link function: identity
##
## Formula:
## obs.tmn ~ s(TREFMNAV, bs = "cr") + s(Z3, bs = "cr") + s(elev,
## bs = "cr") + s(dco, bs = "cr") + s(month, bs = "re")
##
## Estimated degrees of freedom:
## 8.97 8.94 7.81 8.50 1.00 total = 36.22
##
## REML score: 602412.5
summary(fit.tmn)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## obs.tmn ~ s(TREFMNAV, bs = "cr") + s(Z3, bs = "cr") + s(elev,
## bs = "cr") + s(dco, bs = "cr") + s(month, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.55892 0.01581 667.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(TREFMNAV) 8.972 9.000 87931.6 <2e-16 ***
## s(Z3) 8.945 8.998 5995.8 <2e-16 ***
## s(elev) 7.806 8.491 6897.5 <2e-16 ***
## s(dco) 8.499 8.898 370.1 <2e-16 ***
## s(month) 1.000 1.000 69044.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.834 Deviance explained = 83.4%
## -REML = 6.0241e+05 Scale est. = 9.7257 n = 235613
gam.check(fit.tmn)
##
## Method: REML Optimizer: outer newton
## full convergence after 10 iterations.
## Gradient range [-0.07719612,0.07719018]
## (score 602412.5 & scale 9.725685).
## Hessian positive definite, eigenvalue range [0.577168,117804.1].
## Model rank = 38 / 38
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(TREFMNAV) 9.000 8.972 1.016 0.90
## s(Z3) 9.000 8.945 0.997 0.42
## s(elev) 9.000 7.806 1.006 0.66
## s(dco) 9.000 8.499 1.001 0.58
## s(month) 1.000 1.000 0.281 0.00
plot(fit.tmn, shade=T, seWithMean = T, pages = 1)
fit.tmx <- gam(obs.tmx ~ s(TREFMXAV, bs = 'cr') +
s(Z3, bs = 'cr') +
s(elev, bs = 'cr') +
s(dco, bs = 'cr') +
s(month, bs = 're'),
method = 'REML', data = cal.data)
fit.tmx
##
## Family: gaussian
## Link function: identity
##
## Formula:
## obs.tmx ~ s(TREFMXAV, bs = "cr") + s(Z3, bs = "cr") + s(elev,
## bs = "cr") + s(dco, bs = "cr") + s(month, bs = "re")
##
## Estimated degrees of freedom:
## 8.93 8.95 8.73 7.85 1.00 total = 36.47
##
## REML score: 627857.2
summary(fit.tmx)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## obs.tmx ~ s(TREFMXAV, bs = "cr") + s(Z3, bs = "cr") + s(elev,
## bs = "cr") + s(dco, bs = "cr") + s(month, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.63470 0.01736 1304 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(TREFMXAV) 8.932 8.998 138320 <2e-16 ***
## s(Z3) 8.954 8.999 5021 <2e-16 ***
## s(elev) 8.734 8.967 5004 <2e-16 ***
## s(dco) 7.849 8.546 187 <2e-16 ***
## s(month) 1.000 1.000 144117 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.88 Deviance explained = 88%
## -REML = 6.2786e+05 Scale est. = 12.07 n = 235613
gam.check(fit.tmn)
##
## Method: REML Optimizer: outer newton
## full convergence after 10 iterations.
## Gradient range [-0.07719612,0.07719018]
## (score 602412.5 & scale 9.725685).
## Hessian positive definite, eigenvalue range [0.577168,117804.1].
## Model rank = 38 / 38
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(TREFMNAV) 9.000 8.972 1.004 0.62
## s(Z3) 9.000 8.945 1.000 0.52
## s(elev) 9.000 7.806 0.997 0.50
## s(dco) 9.000 8.499 0.985 0.16
## s(month) 1.000 1.000 0.286 0.00
plot(fit.tmx, shade = T, seWithMean = T, pages = 1)
fit.prc.occur <- gam(factor(obs.prc >= 1) ~ s(PRC),
family = binomial, method = 'REML', data = cal.data)
summary(fit.prc.occur)
##
## Family: binomial
## Link function: logit
##
## Formula:
## factor(obs.prc >= 1) ~ s(PRC)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 639.3 693.4 0.922 0.357
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(PRC) 6.427 6.748 3204 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.157 Deviance explained = 38.1%
## -REML = 20146 Scale est. = 1 n = 235613
gam.check(fit.prc.occur)
##
## Method: REML Optimizer: outer newton
## full convergence after 18 iterations.
## Gradient range [0.06175008,0.06175008]
## (score 20145.75 & scale 1).
## Hessian positive definite, eigenvalue range [4.338675,4.338675].
## Model rank = 10 / 10
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(PRC) 9.000 6.427 0.947 0
plot(fit.prc.occur, seWithMean = T, shade = T, pages = 1)
fit.prc <- bam(obs.prc ~ s(PRC, bs = 'cr') +
s(PSL, bs = 'cr') +
s(Q, bs = 'cr') +
s(vg, bs = 'cr') +
s(Z3, bs = 'cr') +
s(elev, bs = 'cr') +
s(dco, bs = 'cr') +
s(month, bs = 're'),
family = Gamma(link = 'log'), method = 'REML', data = cal.data[cal.data$obs.prc>=1,])
fit.prc
##
## Family: Gamma
## Link function: log
##
## Formula:
## obs.prc ~ s(PRC, bs = "cr") + s(PSL, bs = "cr") + s(Q, bs = "cr") +
## s(vg, bs = "cr") + s(Z3, bs = "cr") + s(elev, bs = "cr") +
## s(dco, bs = "cr") + s(month, bs = "re")
##
## Estimated degrees of freedom:
## 8.99 8.98 8.67 8.58 8.95 8.32 8.37
## 1.00 total = 62.85
##
## REML score: 126205.7
summary(fit.prc)
##
## Family: Gamma
## Link function: log
##
## Formula:
## obs.prc ~ s(PRC, bs = "cr") + s(PSL, bs = "cr") + s(Q, bs = "cr") +
## s(vg, bs = "cr") + s(Z3, bs = "cr") + s(elev, bs = "cr") +
## s(dco, bs = "cr") + s(month, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.523807 0.002563 1375 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(PRC) 8.9948 9.000 49653.63 <2e-16 ***
## s(PSL) 8.9754 9.000 2287.80 <2e-16 ***
## s(Q) 8.6718 8.957 636.45 <2e-16 ***
## s(vg) 8.5785 8.882 48.02 <2e-16 ***
## s(Z3) 8.9454 8.998 894.90 <2e-16 ***
## s(elev) 8.3169 8.798 1060.49 <2e-16 ***
## s(dco) 8.3674 8.830 1086.49 <2e-16 ***
## s(month) 0.9999 1.000 6687.05 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.633 Deviance explained = 63.6%
## -REML = 1.2621e+05 Scale est. = 0.18603 n = 217970
gam.check(fit.prc)
##
## Method: REML Optimizer: outer newton
## full convergence after 8 iterations.
## Gradient range [-6.225455e-06,4.819515e-06]
## (score 126205.7 & scale 0.1860329).
## Hessian positive definite, eigenvalue range [0.4998529,108981].
## Model rank = 65 / 65
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(PRC) 9.000 8.995 0.975 0.38
## s(PSL) 9.000 8.975 0.973 0.32
## s(Q) 9.000 8.672 0.956 0.04
## s(vg) 9.000 8.578 0.993 0.78
## s(Z3) 9.000 8.945 0.950 0.02
## s(elev) 9.000 8.317 0.965 0.12
## s(dco) 9.000 8.367 0.955 0.04
## s(month) 1.000 1.000 0.928 0.00
plot(fit.prc, shade = T, seWithMean = T, pages = 1)
lgm.tmx <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.TREFMXAV.149901-189912.nc',
var = "TREFMXAV") %>% subtract(273.15)
lgm.tmn <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.TREFMNAV.149901-189912.nc',
var = "TREFMNAV") %>% subtract(273.15)
lgm.q <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.QREFHT.149901-189912.nc',
var = "QREFHT")
lgm.psl <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.PSL.149901-189912.nc',
var = "PSL") %>% divide_by(1000)
lgm.prcc <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.PRECC.149901-189912.nc',
var = "PRECC") %>% multiply_by(2629743830)
lgm.prcl <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.PRECL.149901-189912.nc',
var = "PRECL") %>% multiply_by(2629743830)
lgm.prc <- lgm.prcl + lgm.prcc
lgm.u <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.U.149901-189912.nc',
var = "U",
level = 26)
lgm.v <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.V.149901-189912.nc',
var = "V",
level = 26)
lgm.z <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.Z3.149901-189912.nc',
var = "Z3", level = 23)
lgm.dir <- (270 - overlay(lgm.v, lgm.u, fun = atan2) * (180 / pi)) %% 360
lgm.vel <- sqrt(lgm.u ^ 2 + lgm.v ^ 2)
lgm.delta <- abs(lgm.dir - aspect)
values(lgm.delta) <- ifelse(values(lgm.delta) > 180, 360 - values(lgm.delta), values(lgm.delta))
lgm.vg <- sin(slope * 2 * pi / 180) * cos(lgm.delta * pi / 180) * lgm.vel * .5
pred.vars <- sapply(1:12, function(x){
brick(lgm.tmx[[x]], lgm.tmn[[x]], lgm.prc[[x]], lgm.psl[[x]], lgm.q[[x]], lgm.z[[x]], elev, dco, lgm.vg[[x]], month[[x]]) %>%
crop(extent(-10, 20, 36, 46)) %>%
setNames(c('TREFMXAV', 'TREFMNAV', 'PRC', 'PSL', 'Q','Z3', 'elev','dco', 'vg','month'))
})
rm(lgm.tmn,lgm.tmx,lgm.z,lgm.q,lgm.v,lgm.u,lgm.prcc,lgm.prcl,lgm.vg,lgm.prc,lgm.dir,lgm.psl,lgm.vel,lgm.delta)
tmn.recon <- lapply(1:12, function(x){predict(pred.vars[[x]], fit.tmn, type = 'response')}) %>% brick
tmx.recon <- lapply(1:12, function(x){predict(pred.vars[[x]], fit.tmx, type = 'response')}) %>% brick
prc.occur.recon <- lapply(1:12, function(x){predict(pred.vars[[x]], fit.prc.occur, type = 'response')}) %>% brick
prc.recon <- lapply(1:12, function(x){predict(pred.vars[[x]], fit.prc, type = 'response')}) %>% brick
prc.recon <- mask(prc.recon, prc.occur.recon>.7, maskvalue=0, update.value=0)